136 ◾ Bioinformatics
The following bash script downloads a standard human SNPs and InDels VCF files with
their index files in the “refvcf” subdirectory and then it performs the two steps of BQSR.
Notice that for BaseRecalibrator tool, the known variant files are provided in “--known-
sites” option. The outputs of the base recalibration are stored in “applyBQSR” subdirectory.
mkdir refvcf
cd refvcf
wget https://storage.googleapis.com/genomics-public-data/
resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.
vcf.gz
wget https://storage.googleapis.com/genomics-public-data/
resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.
vcf.gz.tbi
wget https://storage.googleapis.com/genomics-public-data/
resources/broad/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.
gz
wget https://storage.googleapis.com/genomics-public-data/
resources/broad/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.
gz.tbi
cd ..
##B- Build the BQSR model
#------------------------
mkdir BQSR
cd RG
ref=$(ls ../refgenome/*.fasta)
for i in $(ls *.bam|rev|cut -c 5-|rev);
do
~/software/gatk-4.2.3.0/gatk --java-options \
-Xmx4g BaseRecalibrator \
-I ${i}.bam \
-R ${ref} \
--known-sites ../refvcf/1000G_phase1.snps.high_confidence.
hg38.vcf.gz \
--known-sites ../refvcf/Homo_sapiens_assembly38.known_
indels.vcf.gz \
-O ../BQSR/${i}.table
done
cd ..
##C- Apply the model to adjust the base quality scores
#----------------------------------------------------
mkdir applyBQSR
cd RG
ref=$(ls ../refgenome/*.fasta)
for i in $(ls *.bam|rev|cut -c 5-|rev);
do
~/software/gatk-4.2.3.0/gatk \
--java-options \